/*******************************************************************************
* Long-term effects of weather-induced migration on urban labor and housing 
  markets
* Busso & Chauvin

* Purpose: Calculate the effects of precipitations on rural emigration.

*******************************************************************************/

/*******************************************************************************
	ENVIRONMENT
*******************************************************************************/
/* Geographic unities */

* Origin (rural)
global mca mca8010
global geo mcarp8010

* Destination (urban) / Rural-urban location identifier
global mcarp mcarp8010

/* Migrant information */
* Working-age: max age
global max_age 64

* Working-age: min age
global min_age 15

* Controls
global controls_rur "iag_sh isv_sh igv_sh sh_emp_col pop_growth8091"

/*******************************************************************************
	1.  RURAL CONTROLS
*******************************************************************************/
/* Population */
* Store all census years used
global years "80 91"

foreach y in $years {
	cd "${census_harm}"
	unzipfile Census_`y'
	use wtper muni using "Census_`y'", replace
	merge m:1 muni using "${db_inp}/geo_codes_sat_cities.dta", nogen keepusing(mca8010) keep(match master)
	gen pop_all`y' = 1
	collapse (sum) pop_all`y' [pw = wtper], by(mca8010)
	
	tempfile pop_all`y'
	save `pop_all`y''
}

use `pop_all80', replace
merge 1:1 mca8010 using `pop_all91', nogen

gen l_pop_all80 = ln(pop_all80)
gen l_pop_all91 = ln(pop_all91)
gen pop_growth8091 = l_pop_all91 - l_pop_all80

keep mca8010 l_pop_all91 pop_growth8091

save "${db_tmp}/pop_growth_8010_rural", replace

/* Other variables */
* Load 2010 Census
cd "${census_harm}"
unzipfile Census_10
use wtper muni schcomp employed workage brind using "Census_10", replace
merge m:1 muni using "${db_inp}/geo_codes_sat_cities.dta", nogen ///
keepusing(mca8010 mcarp8010) keep(match master)

* Keep only rural observations
keep if mcarp8010 == 1000000

* Replace labor market vars to represent the desired sample
replace employed = . if workage == 0

gen sh_emp_col = (schcomp == 5) if employed == 1 & schcomp != .

* Main industry variables
gen iag_sh = (brind == 1) if brind != . & employed == 1		
gen igv_sh = (brind == 4) if brind != . & employed == 1		
gen isv_sh = (brind == 3) if brind != . & employed == 1		

* Collapse variables
collapse (sum) iag_sh isv_sh igv_sh sh_emp_col [pw = wtper], by(mca8010)

/* Merge in the population variables */
merge 1:1 mca8010 using "${db_tmp}/pop_growth_8010_rural", ///
nogen keep(match master)
save "${db_tmp}/controls_8010_rural", replace

rm "${db_tmp}/pop_growth_8010_rural.dta"

/*******************************************************************************
	2.  STORE TOTAL POPULATION (1991 CENSUS) TO CALCULATE THE EMIG. SHARES
*******************************************************************************/
/* 1991 Census data */
use muni wtper using "${census_harm}/Census_91", replace

* Merge with geographic unities data. The only locations of interest, here, are
*rural origins, to store the total population per MCA
merge m:1 muni using "${db_inp}/geo_codes_sat_cities.dta", ///
nogen keepusing(${mcarp} ${mca}) keep(match)

* Remove non-rural places
drop if $mcarp != 1000000

* Create a unique identifier for each respondant
gen pop_all = 1

* Collapse into each rural MCA
collapse (sum) pop_all [pw = wtper], by(${mca})

* Save as master census data
tempfile rural_pop_1991
save `rural_pop_1991'

/*******************************************************************************
	3.  CLEAN 2010 CENSUS DATA
*******************************************************************************/
/* Clean the census data to include only the observations of interest */
* Load 2010 Census data
use muni wtper mgprevmuni age mgyrsmuni using "${census_harm}/Census_10", replace

* Drop incomplete information: unavailable municipality of origin, or null
*years of residency in the present municipality
drop if mgprevmuni == . | mgyrsmuni == .

* Drop migrants who weren't in working-age at the time of migration
gen age_mig = age - mgyrsmuni
drop if age_mig > $max_age | age_mig < $min_age

/* Remove non-arranjo destinations */
* Merge muni of the respondant with mcarp, so we can remove non-urban destinations
merge m:1 muni using "${db_inp}/geo_codes_sat_cities.dta", nogen ///
keepusing(${mcarp}) keep(match)

* Remove non-urban destinations
drop if $mcarp == 1000000
drop $mcarp

/* Remove non-rural origin */
rename muni muni2
rename mgprevmuni muni

* Merge previous place of migration with both mca and mcarp
merge m:1 muni using "${db_inp}/geo_codes_sat_cities.dta", ///
nogen keepusing(${mcarp} ${mca}) keep(match)

* Remove non-rural origin, then drop mcarp var
drop if $mcarp != 1000000
drop $mcarp

/* Generate an unique identifier for each migrant to collapse the data */
* Identifier
gen pop_mig = 1

tempfile rural_mig_2010
save `rural_mig_2010'

/*******************************************************************************
	4.  2001-2010 MIGRANTS
*******************************************************************************/
/* Collapse number of migrants down to mca-level: 2006-2010 migrants */
* Load 2010 clean data
use `rural_mig_2010', replace

* Keep only migrants that migrated between 2006 and 2010
keep if mgyrsmuni <= 9

* Collapse
collapse (sum) pop_mig [pw = wtper], by(${mca})

* Merge with total rural population by MCA in the 1991 Census, to use that info
*for denominator of the shares
merge 1:1 $mca using `rural_pop_1991', nogen

* Generate emigration shares per rural MCA
gen emig_rate = pop_mig/pop_all

* Generate year = end of migration period
gen year = 2010

* Save shares in temporary file for appending
save "${db_tmp}/emig_rate", replace

/*******************************************************************************
	5.  MERGE WITH THE DROUGHT INDEX, AGRICULTURAL SHARES
*******************************************************************************/
* Load the drought index
use "${db_tmp}/drought_index_spei8010.dta", replace

* Merge all with migration data
merge 1:m $mca using "${db_tmp}/emig_rate", nogen keep(match)
drop pop_mig pop_all

replace emig_rate = emig_rate*100

* Merge in the controls
merge m:1 mca8010 using "${db_tmp}/controls_8010_rural", nogen
gen macroreg = floor(mca8010/1000000)

gen abs_spei = abs(drought_dws0009)

/*******************************************************************************
	6.  FIRST-STAGE AT THE RURAL LEVEL
*******************************************************************************/
foreach x in "drought_dws0009" "abs_spei" { 

	preserve

	if "`x'" == "drought_dws0009" local cond = ""
	if "`x'" == "abs_spei" local cond = "_abs"

	mat define tmat = J(4, 6, .)

	* Model with drought index only
	reg emig_rate `x' i.macroreg 
	matrix tmat[1, 1] = r(table)[1, 1]
	matrix tmat[2, 1] = r(table)[2, 1]
	matrix tmat[1, 2] = r(table)[4, 1]
	qui summ emig_rate if e(sample) == 1  
	matrix tmat[3, 1] = r(N)
	matrix tmat[4, 1] = r(mean)

	* Model with drought index and controls
	reg emig_rate `x' $controls_rur i.macroreg 
	matrix tmat[1, 3] = r(table)[1, 1]
	matrix tmat[2, 3] = r(table)[2, 1]
	matrix tmat[1, 4] = r(table)[4, 1]
	qui summ emig_rate if e(sample) == 1 
	matrix tmat[3, 3] = r(N)
	matrix tmat[4, 3] = r(mean)

	* Model with drought index, controls, and macroregions fixed effects
	reg emig_rate `x' $controls_rur l_pop_all91 i.macroreg 
	matrix tmat[1, 5] = r(table)[1, 1]
	matrix tmat[2, 5] = r(table)[2, 1]
	matrix tmat[1, 6] = r(table)[4, 1]
	qui summ emig_rate if e(sample) == 1 
	matrix tmat[3, 5] = r(N)
	matrix tmat[4, 5] = r(mean)

	clear
	svmat2 tmat, rnames(var) names(col)

	save "$da_tmp/first_stage_rural`cond'.dta", replace
	
	restore
	
}

rm "${db_tmp}/emig_rate.dta"
rm "${db_tmp}/controls_8010_rural.dta"
cd "${census_harm}"
rm "Census_80.dta"
rm "Census_91.dta"
rm "Census_10.dta"
